## ---------------------------
##
## Script name: National_Analysis.R
##
## Description: Outputs all figures for National Analysis
##
## ---------------------------
## DEPENDENCIES

require(dplyr)
library(tidyverse)
library(ggplot2)
library(sf)
library(tigris)
library(stringr)
library(grid)
library(gtable)
library(kableExtra)

source(paste(file_locations$current_fp, "/R/Nation/Nation_graphing_helpers.R", sep="/"))

options(knitr.kable.NA = '')
options(knitr.table.format = "latex")
figure_fp <- paste(file_locations$current_fp, "figures/Nation", sep="/")

#---------------------- LOAD DATA ---------------------------
# shootings
shootings <- read.csv(paste(file_locations$current_fp, file_locations$Nation$Shootings$processed, sep="/")) %>%
  mutate(tract_2010 = str_pad(tract_2010, 11, side="left", pad="0"))

# demographics  
demos <- read.csv(paste(file_locations$current_fp, file_locations$Nation$Demographics$processed, sep="/")) %>%
  mutate(tract_2010 = str_pad(tract_2010, 11, side="left", pad="0"))

# join shootings + demographics
shootings <- shootings %>%
  inner_join(demos, by=c("tract_2010", "year", "city"))

#------------------------------------------------------------

run_National_analysis <- function(file_locations) {

  #---------------------------------------------
  # 1. NBHD SHOOTINGS + EXPOSURE RATES BY GROUPS
  #---------------------------------------------
  
  # 1a. Average Neighborhood Shooting Rates
  
  tract_labels <- .generate_tract_labels_Nation(shootings, "fatal_shootings", year_min=2014, year_max=2020, aggregate=T)
  
  # calculate shooting rates
  rates <- .calc_crime_rate(df=shootings, per=10000, crime_col="fatal_shootings", population_col="total_population",
                   grouping_vars=c("city", "tract_2010", "year")) %>% rename(shooting_rate = crime_rate)
  
  # join shootings+demos with rates
  shootings_rates <- shootings %>%
    inner_join(rates, by=c("city","tract_2010", "year"))
  
  shootings_rates_w_demos <- shootings_rates %>%
    inner_join(tract_labels, by=c("city","tract_2010"))
  
  df_2020_vs_2014_nbhd <- .generate_nbhd_rates_table_Nation(shootings_rates_w_demos, "shooting_rate")
  
  df_2020_vs_2014_nbhd <- df_2020_vs_2014_nbhd %>%
    filter(year == 2014 | year == 2020,
           category != "Quintile") %>%
    mutate(shooting_rate = ifelse(group=="majority_aapi", NA, shooting_rate))
  
  df_2020_vs_2014_nbhd <- df_2020_vs_2014_nbhd %>%
    mutate(year = factor(year, levels=c("2014", "2020"))) %>%
    mutate(category = case_when(grepl("majority", group) ~ "2014-2020 Race",
                                grepl("poor", group) ~ "2014-2020 Poverty",
                                TRUE ~ "Overall"),
           category = factor(category, levels=c("Overall", "2014-2020 Race", "2014-2020 Poverty")),
           group = factor(group, levels=c("overall_rate","majority_black","majority_white","majority_hisp","majority_aapi",
                                          "all_other_nonmajority",
                                          "poor","nonpoor")),
           group = recode(group, "overall_rate" = "Nation",
                      "poor" = "Poor",
                      "nonpoor" = "Nonpoor",
                      "majority_white" = "White",
                      "majority_black" = "Black",
                      "majority_hisp" = "Hispanic",
                      "majority_aapi" = "AAPI",
                      "all_other_nonmajority" = "Other"),
           shooting_rate = as.numeric(shooting_rate))
  
  subdf_nbhd <- df_2020_vs_2014_nbhd %>%
    spread(key=year, value=shooting_rate) %>%
    mutate(absolute_change = round(`2020` - `2014`, 2),
           relative_change = round(((`2020` - `2014`)/`2014`)*100, 2),
           `2014` = round(`2014`, 2),
           `2020` = round(`2020`, 2)) %>%
    select(category, group, `2014`, `2020`, absolute_change, relative_change)
  
  
  # 1b. Average Exposure to Shooting Rates
    
  df_2020_vs_2014_exp <- .generate_exposure_rates_table_Nation(shootings_rates, "shooting_rate")
  
  df_2020_vs_2014_exp <- df_2020_vs_2014_exp %>%
    mutate(year = factor(year, levels=c("2014", "2020"))) %>%
    mutate(category = case_when(grepl("race", group) ~ "2014-2020 Race",
                                grepl("poverty", group) ~ "2014-2020 Poverty",
                                TRUE ~ "Overall"),
           category = factor(category, levels=c("Overall", "2014-2020 Race", "2014-2020 Poverty")),
           group = factor(group, levels=c("shootr_nation","shootr_black_race","shootr_white_race","shootr_hisp_race","shootr_aapi_race",
                                          "shootr_other_race",
                                          "shootr_poverty","shootr_nonpoverty")),
           group = recode(group, "shootr_nation" = "Nation",
                      "shootr_poverty" = "Poor",
                      "shootr_nonpoverty" = "Nonpoor",
                      "shootr_white_race" = "White",
                      "shootr_black_race" = "Black",
                      "shootr_hisp_race" = "Hispanic",
                      "shootr_aapi_race" = "AAPI",
                      "shootr_other_race" = "Other"))
  
  subdf_exp <- df_2020_vs_2014_exp %>%
    spread(key=year, value=shootr) %>%
    mutate(absolute_change = round(`2020` - `2014`, 2),
           relative_change = round(((`2020` - `2014`)/`2014`)*100, 2),
           `2014` = round(`2014`, 2),
           `2020` = round(`2020`, 2)) %>%
    select(category, group, `2014`, `2020`, absolute_change, relative_change)
  
  
  names(subdf_exp) <- paste0(names(subdf_exp), "_exp")
  
  table_shootings <- cbind(subdf_nbhd %>% arrange(category, group), 
             subdf_exp %>% arrange(category_exp, group_exp) %>% dplyr::select(-category_exp, -group_exp))
  
  landscape(kable(table_shootings%>%select(-category),
      align=c("l","c", "c", "c", "c", "c", "c", "c", "c"), longtable=T, booktabs=TRUE,
      col.names = c( "", "2014", "2020", "Absolute Change",
                             "% Change",
                     "2014", "2020", "Absolute Change",
                             "% Change")) %>%
    add_header_above(c(" "=1,  "Rate of Neighborhood Shootings" = 4, "Rate of Exposure to Shootings" = 4),
                     font_size=11.5, bold=T) %>%
    column_spec(1, italic = T, width="2cm") %>%
    row_spec(0, bold=T) %>%
    column_spec(2:9, width="1.69cm") %>%
    kable_classic(full_width = F,
                  font_size = 8.5, latex_options = c("striped", "repeat_header"),
                  position = "center")) %>%
    pack_rows(index = table(table_shootings$category)) %>%
    save_kable(paste(figure_fp, "Table3_2020_vs_2014_nation_shootings.pdf", sep="/"))
  
  #---------------------------------------------
  # 2. COMPOUNDED DISADVANTAGE BY GROUPS
  #---------------------------------------------
  
  # 2a. Average Police Shooting Rates
  
  ps_df <- read.csv(paste(file_locations$current_fp, file_locations$Nation$`Police Shootings`$processed, sep="/")) %>%
    mutate(tract_2010 = str_pad(tract_2010, 11, side="left", pad="0"))
  
  ps_df <- ps_df %>%
    inner_join(demos, by=c("city","tract_2010", "year"))
  
  ps_rates <- .calc_crime_rate(df=ps_df, per=10000, crime_col="fatal_police_shootings", population_col="total_population", 
                   grouping_vars=c("city", "tract_2010")) %>%
    rename(ps_rate = crime_rate) %>%
    mutate(year = "2014-2020")
  
  tract_labels <- .generate_tract_labels_Nation(shootings, "fatal_shootings", year_min=2014, year_max=2020, aggregate=T)
  
  ps_rates_w_demos <- ps_rates %>%
    inner_join(tract_labels, by=c("city","tract_2010"))
  
  table_ps <- .generate_nbhd_rates_table_Nation(ps_rates_w_demos, "ps_rate")
  
  table_ps <- table_ps %>% 
    mutate(group = recode(group, "overall_rate" = "Nation",
             "first_quintile" = "First Quintile",
             "middle_quintiles" = "Middle Quintiles",
             "fifth_quintile" = "Fifth Quintile",
             "poor" = "Poor",
             "nonpoor" = "Nonpoor",
             "majority_white" = "Majority White",
             "majority_black" = "Majority Black",
             "majority_hisp" = "Majority Hispanic",
             "all_other_nonmajority" = "All Other Nbhds"),
           category = recode(category,
                             "Quintile" = "2014-2020 Quintile",
                             "Race" = "2014-2020 Race",
                             "Poverty" = "2014-2020 Poverty"))
  
    
  table_ps <- table_ps %>%
    mutate(ps_rate = round(ps_rate, 2)) %>%
    select(category, group, ps_rate) %>%
    arrange(category, group)
  
  # 2b. Average Male Incarceration Rates
  
  inc_rates <- read.csv(paste(file_locations$current_fp, file_locations$Nation$`Male Incarceration`$processed, sep="/")) %>%
    mutate(tract_2010 = str_pad(tract_2010, 11, side="left", pad="0"))
  
  df_to_save <- ps_rates %>%
    dplyr::select(city, tract_2010, ps_rate_2014_2020=ps_rate) %>%
    inner_join(inc_rates %>% dplyr::select(tract_2010, incarc_rate_2010 = incarc_rate) 
               , by=c("tract_2010"))
  
  inc_rates_w_demos <- tract_labels %>%
    inner_join(inc_rates, by=c("tract_2010"))
  
  table_inc <- .generate_nbhd_rates_table_Nation(inc_rates_w_demos, "incarc_rate")
  
  table_inc <- table_inc %>%
    filter(group != "majority_aapi") %>%
    mutate(group = recode(group, "overall_rate" = "Nation",
             "first_quintile" = "First Quintile",
             "middle_quintiles" = "Middle Quintiles",
             "fifth_quintile" = "Fifth Quintile",
             "poor" = "Poor",
             "nonpoor" = "Nonpoor",
             "majority_white" = "Majority White",
             "majority_black" = "Majority Black",
             "majority_hisp" = "Majority Hispanic",
             "all_other_nonmajority" = "All Other Nbhds"),
           category = recode(category,
                             "Quintile" = "2014-2020 Quintile",
                             "Race" = "2014-2020 Race",
                             "Poverty" = "2014-2020 Poverty"))
  
  table_inc <- table_inc %>%
    mutate(incarc_rate = round(incarc_rate, 0)) %>%
    select(category, group, incarc_rate) %>%
    arrange(category, group)
  
  # 2c. Join Police Shootings & Male Incarceration
  
  table_joined <- table_inc %>%
    inner_join(table_ps, by=c("category","group")) %>%
    arrange(category, group)
  
  landscape(kable(table_joined%>%select(-category), 
      align=c("l","c", "c"), longtable=T, booktabs=TRUE,
      col.names = c( "", "2010 Male Incarceration Rate","2014-2020 Police Shootings Rate")) %>%
    # add_header_above(c(" "=1, " "=1, "Absolute Change" = 1, "Relative Change" = 1),
    #                  font_size=10, bold=T) %>%
    column_spec(1, italic = T, width="3.2cm") %>%
    row_spec(0, bold=T) %>%
    column_spec(2:3, width="7cm") %>%
    kable_classic(full_width = F,
                  font_size = 10, latex_options = c("striped", "repeat_header"),
                  position = "center")) %>%
    pack_rows(index = table(table_joined$category)) %>%
    save_kable(paste(figure_fp, "Table4_nation_compounded_disadvantage.pdf", sep="/"))
}
  
